Groundhog Day Predictions

A quick exploration of news articles I scraped.
analysis
descriptive statistics
Author

Alex Wainwright

Published

January 31, 2024

# Libraries -------------------

library(data.table)
library(flextable)
library(stringr)
library(tidytuesdayR)
tuesdata <- tt_load('2024-01-30')
--- Compiling #TidyTuesday Information for 2024-01-30 ----
--- There are 2 files available ---
--- Starting Download ---

    Downloading file 1 of 2: `predictions.csv`
    Downloading file 2 of 2: `groundhogs.csv`
--- Download complete ---
groundhog_predictions <-
  tuesdata$predictions |>
  as.data.table()

groundhog_prediction_shadow_proportions <- 
  groundhog_predictions[, 
                        .(proportion_shadow = round(mean(shadow == TRUE, na.rm = T) * 100, 2)), 
                        by = id]
groundhog_details <-
  tuesdata$groundhogs |>
  as.data.table()

groundhog_details <-
  groundhog_details[groundhog_prediction_shadow_proportions, on = "id"]
if (length(dir("groundhog_images/")) == 0) {
  groundhog_images <-
    groundhog_details[, .(image)]

  lapply(groundhog_images$image, function (x) {
    groundhog_name <- str_extract(x, "[a-z-]+.jpeg")
    download.file(x, destfile = paste0("groundhog_images/", groundhog_name))
  })
}

groundhog_details[, image_file_path := paste0("groundhog_images/", slug, ".jpeg")]
setorderv(groundhog_details, cols = "proportion_shadow", order = -1)

flextable(
  groundhog_details, 
  col_keys = c("image_file_path", "name", "city", "region", "country", "proportion_shadow")) |>
  set_header_labels(
    values = c(
      "image_file_path" = "",
      "name" = "Name",
      "city" = "City",
      "region" = "Region",
      "country" = "Country",
      "proportion_shadow" = "Saw its Shadow (%)"
    )
  ) |>
  colformat_image(
    i = 1:75,
    j = 1,
    width = 1,
    height = 1
  ) |>
  align(
    i = 1:75,
    j = 6,
    align = "center"
  ) |>
  autofit()

Name

City

Region

Country

Saw its Shadow (%)

Buffalo Bert

Buffalo

New York

USA

100.00

Bowman Bill

Stephens City

Virginia

USA

90.91

Scramble the Duck

Eastford

Connecticut

USA

88.89

Punxsutawney Phil

Punxsutawney

Pennsylvania

USA

84.38

Chuck Wood

Quincy

California

USA

83.33

Lucy the Lobster

Barrington

Nova Scotia

Canada

80.00

Octoraro Orphie

Quarryville

Pennsylvania

USA

79.12

Uni the Groundhog

Myerstown

Pennsylvania

USA

78.95

Schnogadahl Sammi

Kresgeville

Pennsylvania

USA

77.27

Potomac Phil

Washington DC

District of Columbia

USA

71.43

Boise Bill

Boise

Idaho

USA

70.00

Sir Walter Wally

Raleigh

North Carolina

USA

69.57

Poor Richard

York

Pennsylvania

USA

66.67

Snerd

Garner

North Carolina

USA

66.67

Wildwood Willie

Toledo

Ohio

USA

66.67

Hope the Groundhog

Hope

Indiana

USA

66.67

Flatiron Freddy

Boulder

Colorado

USA

64.29

Mount Joy Minnie

Mount Joy

Pennsylvania

USA

63.64

Oil Springs Ollie

Oil Springs

Ontario

Canada

62.50

Fred la Marmotte

Val-d’Espoir

Quebec

Canada

61.54

Snohomish Slew

Snohomish

Washington

USA

57.14

Van Island Violet

Nanaimo

British Columbia

Canada

57.14

Stormy Marmot

Aurora

Colorado

USA

56.25

Woody the Woodchuck

Howell

Michigan

USA

56.00

Tinicum Tim

Philadelphia

Pennsylvania

USA

55.56

Holtsville Hal

Holtsville

New York

USA

52.00

Malverne Mel

Malverne

New York

USA

50.00

Ms. G

Lincoln

Massachusetts

USA

50.00

Patty Pagoda

Reading

Pennsylvania

USA

50.00

Benny the Bass

Buckeye Lake

Ohio

USA

50.00

Concord Casimir

Concord

Ohio

USA

50.00

Poppy the Groundhog

Millersville

Pennsylvania

USA

50.00

Stumptown Fil

Portland

Oregon

USA

50.00

Heaven’s Wildlife Harvey

Oil Springs

Ontario

Canada

50.00

Lady Edwina of Essex

West Orange

New Jersey

USA

50.00

Pisgah Penny

Brevard

North Carolina

USA

50.00

Unadilla Billie

Unadilla

Nebraska

USA

50.00

Grover and Sue

Pine Grove

Pennsylvania

USA

47.06

Dover Doug

Dover Township

Pennsylvania

USA

45.45

Fenwick Flossie

Pelham

Ontario

Canada

45.45

Lawrenceville Lucy

Lawrenceville

Pennsylvania

USA

45.45

Shubenacadie Sam

Shubenacadie

Nova Scotia

Canada

44.00

Two Rivers Tunnel

Cape Breton

Nova Scotia

Canada

42.86

Lander Lil

Lander

Wyoming

USA

42.31

Manitoba Merv

Stonewall

Manitoba

Canada

42.31

Jimmy the Groundhog

Sun Prairie

Wisconsin

USA

41.82

Wiarton Willie

Wiarton

Ontario

Canada

41.67

Woodstock Willie

Woodstock

Illinois

USA

41.67

Concord Charlie

Athens

West Virginia

USA

40.00

Chuckles the Groundhog

Manchester

Connecticut

USA

40.00

Gordy the Groundhog

Milwaukee

Wisconsin

USA

40.00

Balzac Billy

Balzac

Alberta

Canada

36.84

French Creek Freddie

Upshur County

West Virginia

USA

34.29

Gertie the Groundhog

Hanna City

Illinois

USA

33.33

Chesapeake Chuck

Newport News

Virginia

USA

33.33

Snowy the Prairie Dog

Pend Oreille County

Washington

USA

33.33

MT Parker

Lancaster

Pennsylvania

USA

33.33

Dunkirk Dave

Dunkirk

New York

USA

32.14

Chilly Charlie

Woodstock

Ontario

Canada

30.77

Buckeye Chuck

Marion

Ohio

USA

30.00

Polk County Paula

Des Moines

Iowa

USA

30.00

Smith Lake Jake

Smith Lake

Alabama

USA

28.57

Stonewall Jackson

Wantage

New Jersey

USA

26.67

Harleysville Hank

Harleysville

Pennsylvania

USA

25.00

Yonah the Groundhog

Cleveland

Georgia

USA

25.00

Bee Cave Bob

Bee Cave

Texas

USA

23.08

Staten Island Chuck

New York City

New York

USA

17.86

Sand Mountain Sam

Albertville

Alabama

USA

16.67

General Beauregard Lee

Jackson

Georgia

USA

10.00

Beardsley Bart

Bridgeport

Connecticut

USA

8.33

Middlemiss Mike

Melbourne

Ontario

Canada

7.69

Cluxatawney Henrietta

Katonah

New York

USA

0.00

Mount Gretna Grady

Mount Gretna

Pennsylvania

USA

0.00

Okanagan Okie

Vernon

British Columbia

Canada

0.00

Queen City Charlie

Cumberland

Maryland

USA

0.00